import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller

print("--- STARTING PHASE 4: INDIA MODEL ANALYSIS ---")

# --- 1. Load Your Clean Dataset ---
try:
    model_df = pd.read_csv('GSSI_model_dataset_INDIA.csv', index_col=0, parse_dates=True)
    print("Dataset 'GSSI_model_dataset_INDIA.csv' loaded successfully.")
except FileNotFoundError:
    print("---!!! FATAL ERROR !!!---")
    print("File 'GSSI_model_dataset_INDIA.csv' not found.")
    print("Please run the 'Script for India Model (v18)' first.")
    exit()

# --- 2. Create the 'Lead' Variable ---
model_df['Recession_Next_Q'] = model_df['NBER_RECESSION'].shift(-1)
model_df = model_df.dropna(subset=['Recession_Next_Q'])

# --- 3. Descriptive Visual ---
print("\nGenerating descriptive plot (India MX/LX Ratio vs. Recessions)...")
fig, ax1 = plt.subplots(figsize=(14, 7))

ax1.plot(model_df.index, model_df['MX_LX_Ratio'], color='blue', label='MX/LX Ratio (GSSI-India)')
ax1.set_ylabel('GSSI (MX/LX Ratio)')
ax1.set_xlabel('Year')
ax1.fill_between(model_df.index, 0, 1, where=model_df['NBER_RECESSION'] == 1,
                   color='red', alpha=0.3, transform=ax1.get_xaxis_transform(), 
                   label='India Recession/Slowdown')
ax1.legend(loc='upper left')
plt.title('India GSSI vs. Recession/Slowdown Periods (2004-2024)')
plt.grid(True)
plt.savefig('GSSI_India_vs_Recessions_Plot.png')
print("...Plot saved as 'GSSI_India_vs_Recessions_Plot.png'.")

# --- 4. Model 1: The Full Logit Regression (The Core Test) ---
print("\n--- Running India Full Logit Model (Horserace) ---")

# Define Y and X (with controls)
Y = model_df['Recession_Next_Q']
X = model_df[['MX_LX_Ratio', 'VIX', 'Market_Return']] # Using India VIX and NIFTY Return
X = sm.add_constant(X)
X = X.dropna() # Drop NaNs that might come from VIX or Market_Return
Y = Y.loc[X.index] # Align Y with X after dropping NaNs

try:
    logit_model = sm.Logit(Y, X).fit(disp=0)
    print(logit_model.summary())
except Exception as e:
    print(f"--- Logit Model Failed ---")
    print(f"Error: {e}")
    print("This can be due to too few recession '1's in your 'India_Recessions.csv' file.")

# --- 5. Model 2: Granger Causality (QJE Standard) ---
print("\n--- Running India Granger Causality Test ---")

try:
    # We must use stationary data for VAR/Granger
    adf_test = adfuller(model_df['MX_LX_Ratio'].dropna())
    print(f"ADF test p-value: {adf_test[1]:.4f}.")
    if adf_test[1] > 0.05: # p-value > 0.05 means non-stationary
        print("Ratio is non-stationary. Using first difference.")
        model_df['Ratio_stationary'] = model_df['MX_LX_Ratio'].diff()
    else:
        print("Ratio is stationary. Using as-is.")
        model_df['Ratio_stationary'] = model_df['MX_LX_Ratio']

    # Prep data (must be stationary)
    # Note: Using VIX as the control
    var_data = model_df[['GDP_GROWTH', 'Ratio_stationary', 'VIX']].dropna()
    var_model = VAR(var_data)
    var_results = var_model.fit(maxlags=4, ic='aic')

    # H0: Ratio does NOT Granger-cause GDP_GROWTH
    test_result = var_results.test_causality('GDP_GROWTH', 'Ratio_stationary', kind='f')
    print(f"\nGranger Test (H0: GSSI-India does not cause GDP Growth):")
    print(f"P-value: {test_result.pvalue:.4f}")

except Exception as e:
    print(f"--- Granger Causality Failed ---")
    print(f"Error: {e}")
    print("This is often due to missing data in the GDP or VIX series.")

print("\n--- INDIA ANALYSIS COMPLETE ---")